* Begin log file
capture log close sublog
log using "$projdir/log/1_qcew_synth.txt", name(sublog) text replace

*-------------------------------------------------------------------------------
* Description: Estimate the uncorrected, bias-corrected, and pandemic-corrected
* synthetic control results for individual counties for CA and NY
* Author: Justin Wiltshire (edited by Denis Sosinskiy)
*
* Updated: May 10, 2024
*-------------------------------------------------------------------------------
clear

*---------------------------------------------------------------------------
* California
*---------------------------------------------------------------------------
* Create or clear necessary folders
foreach y in employment avg_wkly_wage {
	local n = 722513
		
		* Ensure necessary folders exist
		capture mkdir "$projdir/dta/analysis/qcew"
		capture mkdir "$projdir/dta/analysis/qcew/`y'"
		capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'"
		capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'/California"
		capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'/California/corrected"
			
		* Clear the old .dta files (if any)
		di "Erasing previously saved outcome data"
		di "..."
		di
		qui local filelist: dir "$projdir/dta/analysis/qcew/`y'/naics`n'/California" files "*.dta"
		di "Filelist:" `filelist'
		foreach f of local filelist {
			qui erase "$projdir/dta/analysis/qcew/`y'/naics`n'/California/`f'"
		}
		
		* And the "corrected" folder
		qui local filelist: dir "$projdir/dta/analysis/qcew/`y'/naics`n'/California/corrected" files "*.dta"
		di "Filelist:" `filelist'
		qui gettoken first filelist : filelist
		
		* Clear the old .dta files (if any)
		qui local filelist: dir "$projdir/dta/analysis/qcew/`y'/naics`n'/California/corrected" files "*.dta"
		di "Filelist:" `filelist'
		foreach f of local filelist {
			qui erase "$projdir/dta/analysis/qcew/`y'/naics`n'/California/corrected/`f'"
	}
}

* Create tempfiles
tempfile core
tempfile core0
tempfile core1
tempfile core2
tempfile core3
tempfile core4

* Set start and end quarters
local trp = 218
local start_tm = 199
local end_tm = 251 
local tot_tm = `end_tm' - `start_tm' + 1
local endqtrlab "2022Q4"

* Ensure distinct package is installed
capture ssc install distinct

* Load the data
use "$projdir/dta/build/cln/analysis_panel_cty.dta", clear

* Rename key variables
qui rename (quarterly_date earnings lnemp ind) (tm demean_lnearn demean_lnemp naics)
format tm %tq

* Generate unique county id and drop obs which don't observe it
qui gen cty_fips = 1000*statefips + countyfips
qui keep if !mi(cty_fips) & !mi(avg_wkly_wage) & !mi(employment) & avg_wkly_wage != 0 & employment != 0
assert cty_fips
assert avg_wkly_wage
assert employment
assert !mi(avg_wkly_wage)
assert !mi(employment)

* Recode pre-2011 full- and limited-service restaurant NAICS codes
qui recode naics (722211=722513) if year <= 2011

* Keep NAICS 10, 722, and 722513
qui keep if inlist(naics, 10, 722, 722513)

* Sort
sort cty_fips tm naics

* Restrict to desired time-period
keep if inrange(tm, `start_tm', `end_tm')

* Identify treated and donor counties
levelsof cty_fips if statefips == 6, local(trcty)
local trctylist = subinstr("`trcty'", " ", ",",.)
qui distinct cty_fips if inlist(cty_fips, `trctylist')
local trcty_count = r(ndistinct)

* Generate donor variable
qui gen donor = !inlist(cty_fips, `trctylist')

* Restrict to donor counties with no min wage changes (and the treated counties)
qui egen double max_loc_max = max(loc_max_mw), by(cty_fips)
qui egen double min_loc_max = min(loc_max_mw), by(cty_fips)
qui gen x = (naics == 722 & min_emp >= 5000)
qui drop if cty_fips == 13121

*Drop San Luis Obispo
qui drop if cty_fips == 6079 
bysort cty_fips: egen empkeep = max(x)
qui keep if (max_loc_max == min_loc_max | donor == 0) & empkeep == 1
if `end_tm' > 239 {
	qui drop if inlist(floor(cty_fips/1000), 25, 51)
}

* Save tempfile
qui compress
qui save "`core'", replace

* Make county change adjustments and get mean of avg_wkly_wage over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (mean) avg_wkly_wage donor year qtr [aw=employment], by(cty_fips tm naics)
assert inlist(donor, 0, 1)

* Save tempfile
qui save "`core2'", replace

* Reload the data
qui use "`core'", clear

* Make county change adjustments and get sum of employment over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (sum) employment pop10, by(cty_fips tm naics)

* Merge
qui merge 1:1 cty_fips tm naics using "`core2'", nogen norep

* Keep only a balanced panel over periods of interest
qui keep if inrange(tm, `start_tm', `end_tm')	
bysort cty_fips naics: gen ct = _N
qui keep if ct == `tot_tm'

* Merge with the LAUS data
qui merge m:1 cty_fips year using "$projdir/dta/build/cln/laus_cty_NYCnocombine.dta", nogen norep keep(3)

* Merge with the Covid-impact index
qui save "`core'", replace
qui use "$projdir/dta/build/cln/covid_index.dta", clear
qui rename (countyfips) (cty_fips)
qui gen covid_index = (workplace + retail)/2
qui keep cty_fips covid_index
qui save "`core2'", replace
qui use "`core'", clear
merge m:1 cty_fips using "`core2'", nogen norep keep(3)

* Observe overall employment in each period
qui gen x = employment if naics == 10
bysort cty_fips tm: egen emp10 = max(x)
qui drop x
qui drop if naics == 10

* Keep variables of interest	
qui keep cty_fips tm naics avg_wkly_wage employment emp10 pop10 unr covid_index donor

* Save
qui save "`core0'", replace

* For each of 722 and 722513
foreach n in 722 722513 {
	
	* Load and restrict
	qui use "`core0'", clear
	qui keep if naics == `n'

	* Put in % of 2014q2 levels
	foreach v in employment avg_wkly_wage emp10 {
		qui gen x = `v' if tm == 217
		bysort cty_fips: egen xx = max(x)
		qui replace `v' = 100*`v'/xx
		qui drop x xx
	}

	* Reshape data to debias it
	local predvars ""
	forval t = 199/207 {
		qui gen x = employment if tm == `t'
		bysort cty_fips: egen emp_`t' = max(x)
		qui drop x
		local predvars "`predvars' emp_`t'"
		qui gen x = avg_wkly_wage if tm == `t'
		bysort cty_fips: egen earn_`t' = max(x)
		qui drop x
		local predvars "`predvars' earn_`t'"
		qui gen x = emp10 if tm == `t'
		bysort cty_fips: egen emp10_`t' = max(x)
		qui drop x
		local predvars "`predvars' emp10_`t'"
	}
	foreach v in employment avg_wkly_wage emp10 unr {
		bysort cty_fips: egen x = mean(`v') if inrange(tm, 199, 207)
		bysort cty_fips: egen mean_`v' = max(x)
		qui drop x
		local predvars "`predvars' mean_`v'"
	}
	save "`core1'", replace

	* "Covid- and bias-correct" the outcomes of interest
	qui gen p_employment = .
	qui gen p_avg_wkly_wage = .
	qui levelsof tm, local(Time)
	foreach v in employment avg_wkly_wage {
		foreach t of local Time {
			qui reg `v' `predvars' covid_index [aw=pop10] if tm == `t' & donor == 1
			qui predict p_`v'_`t', resid
			qui replace p_`v' = p_`v'_`t' if tm == `t'
			qui drop p_`v'_`t'
		}
	}
		
	* Keep key variables
	qui keep cty_fips tm naics employment avg_wkly_wage p_* emp10 pop10 unr donor
		
	* Compress, sort, and save
	qui compress
	qui sort cty_fips tm
	if `n' == 722513 {
		qui append using "`core'"
	}
	qui save "`core'", replace
}

* Get the synthetic control weights for each treated county
qui use "`core'", clear
qui levelsof cty_fips if donor == 0, local(trcty)

foreach c of local trcty {
	foreach y in employment avg_wkly_wage {
		local n = 722513 
				
		* Set macros
		local titloc "Average weekly earnings"
		if strpos("`y'", "emp") != 0 {
			local titloc "Employment"
		}
		local transform "transform(`y', normalize)"
		local ytitloc "Normalized Gap (%)"
			
		* Re-load the data
		qui use "`core'", clear
						
		* Restrict to selected sample of counties observed for NAICS `n', and ensure a balanced panel
		qui keep if naics == `n'
		qui keep if cty_fips == `c' | donor == 1
		bysort cty_fips: gen N = _N
		qui keep if N == `tot_tm'
		drop N
		local tlab "tlabel(2009Q4(4)`endqtrlab', angle(45))"
		qui levelsof cty_fips if inlist(cty_fips, `trctylist'), local(county)
		qui distinct cty_fips if donor == 1
		local donorct = r(ndistinct)
		qui distinct cty_fips if inlist(cty_fips, `trctylist')
		di "Balanced panel exists for `r(ndistinct)' of `trcty_count' treated counties and `donorct' donor pool counties"

		* Save tempfile
		save "`core2'", replace
						
		* Create population-based weights for the averaging
		qui gen x = pop10
		bysort cty_fips: egen popwt = max(x)
					
		* Create the list of pre-treatment outcome period predictors
		local emp10preds
		local normemp
		local ypreds
		local predend = 207
		forval t = `start_tm'/`predend' {
			local ypreds "`ypreds' `y'(`t')"
			local emp10preds "`emp10preds' emp10(`t')"
			if `t' == `predend' {
				local emp10preds "`emp10preds' emp10(`start_tm'(1)`predend')"
			}
			local normemp "emp10"
		}
	
		* tsset
		format tm %tq
		tsset cty_fips tm, quarterly
		qui gen treated = (donor == 0) 
		qui gen trp = 218*treated
					
		#delimit ;
			allsynth4
				`y' `ypreds' `y'(`start_tm'(1)`predend') unr(`start_tm'(4)`predend') `emp10preds', 
				trunit(`c') trperiod(`trp') 
				bcorrect(merge) 
				transform(`y' `normemp', normalize)
				pvalues(rmspe)
				keep($projdir/dta/analysis/qcew/`y'/naics`n'/California/tr_`c') replace;
			#delimit cr
						
		* Load the estimates data
		qui use "$projdir/dta/analysis/qcew/`y'/naics`n'/California/tr_`c'", clear
					
		* Get the weights
		qui keep cty_fips _Co_Number _W_Weight
		qui keep if !mi(_W_) & _W_ > 0
		qui rename cty_fips county_fips
		qui rename _Co_Number cty_fips
		qui levelsof county_fips, local(units)
		qui gettoken first : units
		qui save "`core2'", replace
					
		* For each unit, merge the weights into the core data
		foreach i of local units {
			qui use "`core2'", clear
			qui keep if county_fips == `i'
			qui save "`core3'", replace
			qui use "`core'", clear
			qui keep if naics == `n'
			qui keep cty_fips tm `y' p_`y'
			merge m:1 cty_fips using "`core3'", nogen norep keep(3)
			if `i' != `first' {
				qui append using "`core4'"
			}
			qui save "`core4'", replace
		}

		* Collapse to get the synthetic unit estimates for the treated unit and the donor pool units and save
		qui collapse (mean) p_`y'_synthetic=p_`y' [aw=_W_Weight], by(county_fips tm)
		qui rename county_fips cty_fips
		qui save "`core2'", replace

		* Merge into core data for the treated unit, and save
		qui use "`core'", clear
		qui keep if naics == `n'
		qui keep cty_fips tm p_`y' pop10
		qui rename p_`y' p_`y'_treated
		qui merge 1:1 cty_fips tm using "`core2'", nogen norep
		qui rename tm _time
		qui gen p_gap = p_`y'_treated - p_`y'_synthetic
		qui gen trunit = `c'
		qui gen trperiod = `trp'
		qui save "$projdir/dta/analysis/qcew/`y'/naics`n'/California/corrected/tr_`c'", replace
	}
}

*---------------------------------------------------------------------------
* New York 
*---------------------------------------------------------------------------

foreach y in employment avg_wkly_wage {
	local n = 722513
		
	* Ensure necessary folders exist
	capture mkdir "$projdir/dta/analysis/qcew"
	capture mkdir "$projdir/dta/analysis/qcew/`y'"
	capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'"
	capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'"
	capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'/NY"
	capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'/NY/corrected"
			
	* Clear the old .dta files (if any)
	di "Erasing previously saved outcome data"
	di "..."
	di
	qui local filelist: dir "$projdir/dta/analysis/qcew/`y'/naics`n'/NY" files "*.dta"
	di "Filelist:" `filelist'
	foreach f of local filelist {
		qui erase "$projdir/dta/analysis/qcew/`y'/naics`n'/NY/`f'"
	}
	
	* And the "corrected" folder
	qui local filelist: dir "$projdir/dta/analysis/qcew/`y'/naics`n'/NY/corrected" files "*.dta"
	di "Filelist:" `filelist'
	qui gettoken first filelist : filelist
	
	* Clear the old .dta files (if any)
	qui local filelist: dir "$projdir/dta/analysis/qcew/`y'/naics`n'/NY/corrected" files "*.dta"
	di "Filelist:" `filelist'
	foreach f of local filelist {
		qui erase "$projdir/dta/analysis/qcew/`y'/naics`n'/NY/corrected/`f'"
	}
}


* Create tempfiles
tempfile core
tempfile core0
tempfile core1
tempfile core2
tempfile core3
tempfile core4

* Set start and end quarters
local trp = 216
local start_tm = 199
local end_tm = 251 
local tot_tm = `end_tm' - `start_tm' + 1
local endqtrlab "2022Q4"


* Ensure distinct package is installed
capture ssc install distinct

* Load the data
use "$projdir/dta/build/cln/analysis_panel_cty.dta", clear

* Rename key variables
qui rename (quarterly_date earnings lnemp ind) (tm demean_lnearn demean_lnemp naics)
format tm %tq

* Generate unique county id and drop obs which don't observe it
qui gen cty_fips = 1000*statefips + countyfips
qui keep if !mi(cty_fips) & !mi(avg_wkly_wage) & !mi(employment) & avg_wkly_wage != 0 & employment != 0
assert cty_fips
assert avg_wkly_wage
assert employment
assert !mi(avg_wkly_wage)
assert !mi(employment)

* Recode pre-2011 full- and limited-service restaurant NAICS codes
qui recode naics (722211=722513) if year <= 2011

* Keep NAICS 10, 722, and 722513
qui keep if inlist(naics, 10, 722, 722513)

* Sort
sort cty_fips tm naics

* Restrict to desired time-period
keep if inrange(tm, `start_tm', `end_tm')

* Identify treated and donor counties
levelsof cty_fips if statefips == 36, local(trcty)
local trctylist = subinstr("`trcty'", " ", ",",.)
qui distinct cty_fips if inlist(cty_fips, `trctylist')
local trcty_count = r(ndistinct)

* Generate donor variable
qui gen donor = !inlist(cty_fips, `trctylist')

* Restrict to donor counties with no min wage changes (and the treated counties)
qui egen double max_loc_max = max(loc_max_mw), by(cty_fips)
qui egen double min_loc_max = min(loc_max_mw), by(cty_fips)
qui gen x = (naics == 722 & min_emp >= 5000)
qui drop if cty_fips == 13121 // Fulton County, GA has an issue in the raw earnings data
qui drop if cty_fips == 36029 //  Erie County has a weird jump in the earnings data 
bysort cty_fips: egen empkeep = max(x)
qui keep if (max_loc_max == min_loc_max | donor == 0) & empkeep == 1
if `end_tm' > 239 {
	qui drop if inlist(floor(cty_fips/1000), 25, 51)
}

* Save tempfile
qui compress
qui save "`core'", replace

* Make county change adjustments and get mean of avg_wkly_wage over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (mean) avg_wkly_wage donor year qtr [aw=employment], by(cty_fips tm naics)
assert inlist(donor, 0, 1)

* Save tempfile
qui save "`core2'", replace

* Reload the data
qui use "`core'", clear

* Make county change adjustments and get sum of employment over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (sum) employment pop10, by(cty_fips tm naics)

* Merge
qui merge 1:1 cty_fips tm naics using "`core2'", nogen norep

* Keep only a balanced panel over periods of interest
qui keep if inrange(tm, `start_tm', `end_tm')	
bysort cty_fips naics: gen ct = _N
qui keep if ct == `tot_tm'

* Merge with the LAUS data
qui merge m:1 cty_fips year using "$projdir/dta/build/cln/laus_cty_NYCnocombine.dta", nogen norep keep(3)

* Merge with the Covid-impact index
qui save "`core'", replace
qui use "$projdir/dta/build/cln/covid_index.dta", clear
qui rename (countyfips) (cty_fips)
qui gen covid_index = (workplace + retail)/2
qui keep cty_fips covid_index
qui save "`core2'", replace
qui use "`core'", clear
merge m:1 cty_fips using "`core2'", nogen norep keep(3)

* Observe overall employment in each period
qui gen x = employment if naics == 10
bysort cty_fips tm: egen emp10 = max(x)
qui drop x
qui drop if naics == 10

* Keep variables of interest	
qui keep cty_fips tm naics avg_wkly_wage employment emp10 pop10 unr covid_index donor

* Only for fast food
qui keep if naics == 722513
local n = 722513

* Save
qui save "`core0'", replace

* Load and restrict
qui use "`core0'", clear

* Put in % of 2014q2 levels
foreach v in employment avg_wkly_wage emp10 {
	qui gen x = `v' if tm == 215
	bysort cty_fips: egen xx = max(x)
	qui replace `v' = 100*`v'/xx
	qui drop x xx
}

* Reshape data to debias it
local predvars ""
forval t = 199/207 {
	qui gen x = employment if tm == `t'
	bysort cty_fips: egen emp_`t' = max(x)
	qui drop x
	local predvars "`predvars' emp_`t'"
	qui gen x = avg_wkly_wage if tm == `t'
	bysort cty_fips: egen earn_`t' = max(x)
	qui drop x
	local predvars "`predvars' earn_`t'"
	qui gen x = emp10 if tm == `t'
	bysort cty_fips: egen emp10_`t' = max(x)
	qui drop x
	local predvars "`predvars' emp10_`t'"
}
foreach v in employment avg_wkly_wage emp10 unr {
	bysort cty_fips: egen x = mean(`v') if inrange(tm, 199, 207)
	bysort cty_fips: egen mean_`v' = max(x)
	qui drop x
	local predvars "`predvars' mean_`v'"
}
save "`core1'", replace

* "Covid- and bias-correct" the outcomes of interest
qui gen p_employment = .
qui gen p_avg_wkly_wage = .
qui levelsof tm, local(Time)
foreach v in employment avg_wkly_wage {
	foreach t of local Time {
		qui reg `v' `predvars' covid_index [aw=pop10] if tm == `t' & donor == 1
		qui predict p_`v'_`t', resid
		qui replace p_`v' = p_`v'_`t' if tm == `t'
		qui drop p_`v'_`t'
	}
}
		
* Keep key variables
qui keep cty_fips tm naics employment avg_wkly_wage p_* emp10 pop10 unr donor
		
* Compress, sort, and save
qui compress
qui sort cty_fips tm
qui save "`core'", replace

* Get the synthetic control weights for each treated county
qui use "`core'", clear
qui levelsof cty_fips if donor == 0, local(trcty)
	
foreach c of local trcty {
	foreach y in employment avg_wkly_wage {

		* Set macros
		local titloc "Average weekly earnings"
		if strpos("`y'", "emp") != 0 {
			local titloc "Employment"
		}
		local transform "transform(`y', normalize)"
		local ytitloc "Normalized Gap (%)"
			
		* Re-load the data
		qui use "`core'", clear
						
		* Restrict to selected sample of counties observed for NAICS `n', and ensure a balanced panel
		qui keep if naics == `n'
		qui keep if cty_fips == `c' | donor == 1
		bysort cty_fips: gen N = _N
		qui keep if N == `tot_tm'
		drop N
		local tlab "tlabel(2009Q4(4)`endqtrlab', angle(45))"
		qui levelsof cty_fips if inlist(cty_fips, `trctylist'), local(county)
		qui distinct cty_fips if donor == 1
		local donorct = r(ndistinct)
		qui distinct cty_fips if inlist(cty_fips, `trctylist')
		di "Balanced panel exists for `r(ndistinct)' of `trcty_count' treated counties and `donorct' donor pool counties"

		* Save tempfile
		save "`core2'", replace
						
		* Create population-based weights for the averaging
		qui gen x = pop10
		bysort cty_fips: egen popwt = max(x)
				
		* Create the list of pre-treatment outcome period predictors
		local emp10preds
		local normemp
		local ypreds
		local predend = 207
		forval t = `start_tm'/`predend' {
			local ypreds "`ypreds' `y'(`t')"
			local emp10preds "`emp10preds' emp10(`t')"
			if `t' == `predend' {
				local emp10preds "`emp10preds' emp10(`start_tm'(1)`predend')"
			}
			local normemp "emp10"
		}
	
		* tsset
		format tm %tq
		tsset cty_fips tm, quarterly
		qui gen treated = (donor == 0) 
		qui gen trp = 216*treated
					
		#delimit ;
			allsynth4
				`y' `ypreds' `y'(`start_tm'(1)`predend') unr(`start_tm'(4)`predend') `emp10preds', 
				trunit(`c') trperiod(`trp') 
				bcorrect(merge) 
				transform(`y' `normemp', normalize)
				pvalues(rmspe)
				keep($projdir/dta/analysis/qcew/`y'/naics`n'/NY/tr_`c') replace;
			#delimit cr
						
		* Load the estimates data
		qui use "$projdir/dta/analysis/qcew/`y'/naics`n'/NY/tr_`c'", clear
					
		* Get the weights
		qui keep cty_fips _Co_Number _W_Weight
		qui keep if !mi(_W_) & _W_ > 0
		qui rename cty_fips county_fips
		qui rename _Co_Number cty_fips
		qui levelsof county_fips, local(units)
		qui gettoken first : units
		qui save "`core2'", replace
					
		* For each unit, merge the weights into the core data
		foreach i of local units {
			qui use "`core2'", clear
			qui keep if county_fips == `i'
			qui save "`core3'", replace
			qui use "`core'", clear
			qui keep if naics == `n'
			qui keep cty_fips tm `y' p_`y'
			merge m:1 cty_fips using "`core3'", nogen norep keep(3)
			if `i' != `first' {
				qui append using "`core4'"
			}
			qui save "`core4'", replace
		}

		* Collapse to get the synthetic unit estimates for the treated unit and the donor pool units and save
		qui collapse (mean) p_`y'_synthetic=p_`y' [aw=_W_Weight], by(county_fips tm)
		qui rename county_fips cty_fips
		qui save "`core2'", replace

		* Merge into core data for the treated unit, and save
		qui use "`core'", clear
		qui keep if naics == `n'
		qui keep cty_fips tm p_`y' pop10
		qui rename p_`y' p_`y'_treated
		qui merge 1:1 cty_fips tm using "`core2'", nogen norep
		qui rename tm _time
		qui gen p_gap = p_`y'_treated - p_`y'_synthetic
		qui gen trunit = `c'
		qui gen trperiod = `trp'
		qui save "$projdir/dta/analysis/qcew/`y'/naics`n'/NY/corrected/tr_`c'", replace
	}
}

* Close the log
log close sublog
